import os
import sys
sys.path.append(os.path.abspath(os.path.dirname(__file__) + '/' + '../..'))

import matplotlib as mpl

mpl.use('pgf')
pgf_with_latex = {                     
    "pgf.texsystem": "xelatex",        
    "pgf.rcfonts": False,
    "text.usetex": True,                
    "font.family": "Times New Roman",
    "pgf.preamble": [
        r"\usepackage{fontspec}",    
        r"\setmainfont{Times New Roman}",        
        r"\usepackage{unicode-math}",
        r"\setmathfont{XITS Math}"
        ]
    }    
mpl.rcParams.update(pgf_with_latex)

import numpy as np
import scipy
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf

from python.tools import (
    clean_folder
)

from python.numerical.functions import (
    fit_all_models,
    get_boostrap_dataset,
    estimate_IRF_local_projection,
    construct_leads_lags
)

################
## Parameters ##
################

input_folder = './numerical/input'
output_folder = './numerical/output/calibration'
rho = 0.82 # Actual persistence
max_L = 12 # Number of bias coefficients to fit
eps = 1e-6 # Numerical tolerance
B = 2000 # Number of bootstrap replications
block_length = 20 # Block length for the bootstrap
max_K = 4 # Number of lags to include in local projections
theta_se_upper_bnd = 10.0 # Upper bound on theta when calculating bootstrap s.e.

#############################
## Get empirical estimates ##
#############################

clean_folder(output_folder)
np.random.seed(20222022)

# Read in aggregate bias coefficients
df_bias_coefs = pd.read_csv('{}/IRF_SPF_consensus.txt'.format(input_folder), sep = '\t')
df_bias_coefs['aggregate_empirical'] = -df_bias_coefs['IRF']
df_bias_coefs = df_bias_coefs[['lag', 'aggregate_empirical', 'se']]
df_bias_coefs.rename(columns = {'se': 'se_agg'},
                     inplace = True)

# Read in idiosyncratic bias coefficients
df_temp = pd.read_csv('{}/IRF_idiosyncratic.txt'.format(input_folder), sep = '\t')

# Disregard the first IRF (upon impact)
df_temp['lag'] = df_temp['lag'] - 1
df_temp['idiosyncratic_empirical'] = -df_temp['IRF']
df_temp = df_temp[['lag', 'idiosyncratic_empirical', 'se']]
df_temp.rename(columns = {'se': 'se_idio'},
               inplace = True)

# Merge in idiosyncratic coefficients
df_bias_coefs = pd.merge(df_bias_coefs, df_temp, on = 'lag', how = 'left')
df_bias_coefs.rename(columns = {'lag': 'L'},
                     inplace = True)

# Clean up
mask = (df_bias_coefs['L'] >= 1) & (df_bias_coefs['L'] <= max_L)
df_bias_coefs = df_bias_coefs.loc[mask, ]
df_bias_coefs.to_csv('{}/empirical_bias_coefs.csv'.format(output_folder), index = False)

##############################
## Perform fitting exercise ##
##############################

params_DGP = {'rho': rho}
res = fit_all_models(df_bias_coefs, params_DGP)
res.to_csv('{}/model_fit.csv'.format(output_folder), index = False)

###################################
## Get bootstrap standard errors ##
###################################

# Read individual-level dataset
df = pd.read_csv('{}/individual_dataset.csv'.format(input_folder))

res_bias_coefs = []
res_calibration = []

for bb in range(B):
    # Create bootstrap dataset
    bootstrap_data = get_boostrap_dataset(df_ind = df, block_length = block_length)
    df_ind  = bootstrap_data['individual']
    df_cons = bootstrap_data['consensus']

    # Create variables for regressions
    df_temp = df_ind.groupby('T').mean()['SPFfor_Step2'].reset_index()
    df_temp.rename(columns = {'SPFfor_Step2': 'Consensusfor_Step2'},
                   inplace = True)
    df_ind = pd.merge(df_ind, df_temp, on = 'T', how = 'left')
    df_ind['DevFromConsensusfor_Step2'] = df_ind['SPFfor_Step2'] - df_ind['Consensusfor_Step2']
    df_ind['Efor_Step2']  = df_ind['Realiz1'] - df_ind['SPFfor_Step2']
    df_cons['Efor_Step2'] = df_cons['Realiz1'] - df_cons['SPFfor_Step2']
    df_ind = construct_leads_lags(df_ind, 
        np.max([max_K, max_L]), var_names = ['DevFromConsensusfor_Step2', 'Efor_Step2'], 
        group_ID = 'ID')

    # Estimate aggregate bias coefficients
    # from consensus dataset
    IRF_cons = estimate_IRF_local_projection(df = df_cons, var_name = 'Efor_Step2', max_L = max_L, max_K = max_K)

    # Estimate idiosyncratic bias coefficients using
    # two-stage least squares. 

    # Estimate first stage
    formula = 'SPFfor_Step2 ~ C(ID) + DevFromConsensusfor_Step2'
    for kk in range(1, max_K + 1):
        formula += ' + DevFromConsensusfor_Step2_LAG{}'.format(kk)
    first_stage = smf.ols(formula, data = df_ind).fit()

    # Save fitted values
    df_ind['SPFfor_Step2_fitted'] = first_stage.fittedvalues

    # Estimate idiosyncratic bias coefficients
    IRF_idio = []
    for ll in range(1, max_L + 1):
        formula = 'Efor_Step2_LEAD{} ~ C(ID) + SPFfor_Step2_fitted'.format(ll)
        for kk in range(1, max_K + 1):
            formula += ' + DevFromConsensusfor_Step2_LAG{}'.format(kk)
        model = smf.ols(formula, data = df_ind).fit()
        IRF_idio.append({'lag': ll,
                    'IRF': model.params['SPFfor_Step2_fitted']})
    IRF_idio = pd.DataFrame(IRF_idio)

    # Collect bias coefficients
    df_bias_coefs_boot = IRF_cons.copy()
    df_bias_coefs_boot['IRF'] = (-1) * df_bias_coefs_boot['IRF'] # Convert IRF to bias coefficients
    df_bias_coefs_boot.rename(columns = {'IRF': 'aggregate_empirical'},
                         inplace = True)
    df_bias_coefs_boot = pd.merge(df_bias_coefs_boot, IRF_idio, on = 'lag')
    df_bias_coefs_boot['IRF'] = (-1) * df_bias_coefs_boot['IRF'] # Convert IRF to bias coefficients
    df_bias_coefs_boot.rename(columns = {'IRF': 'idiosyncratic_empirical',
                                    'lag': 'L'},
                         inplace = True)

    # Fit models
    model_fit = fit_all_models(df_bias_coefs_boot, params_DGP)

    # Save results
    df_bias_coefs_boot['B'] = bb + 1
    res_bias_coefs.append(df_bias_coefs_boot)
    model_fit['B'] = bb + 1
    res_calibration.append(model_fit)

    # Print status
    if (bb + 1) % 5 == 0:
        print('Current bootstrap sample = {} out of {}'.format(bb + 1, B))

# Save output
res_bias_coefs = pd.concat(res_bias_coefs)
res_bias_coefs.to_csv('{}/bootstrap_bias_coefs.csv'.format(output_folder), index = False)
res_calibration = pd.concat(res_calibration)
res_calibration.to_csv('{}/bootstrap_model_fit.csv'.format(output_folder), index = False)

# Calculate bootstrap s.e.
df_boot_se = res_calibration.groupby('model').std().reset_index()
mask = df_boot_se['model'] != 'noisy_diagnostic'
df_boot_se = df_boot_se.loc[mask, ]

# For the noisy-diagnostic model, impose
# a bound on theta to avoid undue influence
# from outlier simulations
mask = (res_calibration['model'] == 'noisy_diagnostic') & (res_calibration['theta'] <= theta_se_upper_bnd)
df_temp = res_calibration.loc[mask, ].copy()
df_temp = df_temp.groupby('model').std().reset_index()
df_boot_se = df_boot_se.append(df_temp)

# Clean up
del df_boot_se['B']
df_boot_se.to_csv('{}/bootstrap_se.csv'.format(output_folder), index = False)

############################
## Get s.e. of Delta(SSR) ##
############################

## Noisy-diagnostic vs noisy-misperception model

# Calculate Delta(SSR) on the bootstrap sample
mask = (res_calibration['model'] == 'noisy_diagnostic') | (res_calibration['model'] == 'noisy_misperception')
df_temp = res_calibration.loc[mask, ['model', 'B', 'SSR']].copy()
df_temp = df_temp.pivot(index = 'B', columns = 'model', values = 'SSR').reset_index()

# Calculate p-value
se_Delta_SSR = (df_temp['noisy_diagnostic'] - df_temp['noisy_misperception']).std()
se_output = 's.e.(Delta SSR) = {}'.format(se_Delta_SSR)
with open('{}/se_Delta_SSR_noisy_diagnostic_noisy_misperception.txt'.format(output_folder), "w") as text_file:
    text_file.write(se_output)

# Get a histogram of bootstrap test statistics
fig, ax = plt.subplots(figsize=(4.5, 3.0))
plt.hist(df_temp['noisy_diagnostic'] - df_temp['noisy_misperception'], 
         bins = 50)
plt.xlabel('SSR(Noisy Diagnostic) - SSR(Noisy Misperception)',
           fontsize = 12)
plt.ylabel('Num. of Bootstrap Simulations', 
           fontsize = 12)
fig.tight_layout()
plt.savefig('{}/se_Delta_SSR_noisy_diagnostic_noisy_misperception.png'.format(output_folder), dpi = 600)
plt.savefig('{}/se_Delta_SSR_noisy_diagnostic_noisy_misperception.pgf'.format(output_folder), dpi = 600)
plt.show()

## Misperception vs baseline-noist model

# Calculate Delta(SSR) on the bootstrap sample
mask = (res_calibration['model'] == 'misperception') | (res_calibration['model'] == 'baseline_noisy')
df_temp = res_calibration.loc[mask, ['model', 'B', 'SSR']].copy()
df_temp = df_temp.pivot(index = 'B', columns = 'model', values = 'SSR').reset_index()

# Calculate p-value
se_Delta_SSR = (df_temp['misperception'] - df_temp['baseline_noisy']).std()
se_output = 's.e.(Delta SSR) = {}'.format(se_Delta_SSR)
with open('{}/se_Delta_SSR_misperception_noisy.txt'.format(output_folder), "w") as text_file:
    text_file.write(se_output)

# Get a histogram of bootstrap test statistics
fig, ax = plt.subplots(figsize=(4.5, 3.0))
plt.hist(df_temp['misperception'] - df_temp['baseline_noisy'], 
         bins = 50)
plt.xlabel('SSR(Misperception) - SSR(Baseline Noisy)',
           fontsize = 12)
plt.ylabel('Num. of Bootstrap Simulations', 
           fontsize = 12)
fig.tight_layout()
plt.savefig('{}/se_Delta_SSR_misperception_noisy.png'.format(output_folder), dpi = 600)
plt.savefig('{}/se_Delta_SSR_misperception_noisy.pgf'.format(output_folder), dpi = 600)
plt.show()

#####################
## Get LaTeX table ##
#####################

res = res.sort_values(by = 'SSR')
for col_name in res.columns:
    if col_name != 'model':
        res[col_name] = res[col_name].apply(lambda x: '{:.2f}'.format(x))

output_table = pd.DataFrame()
for index, row in res.iterrows():
    output_table = output_table.append(row)
    mask = df_boot_se['model'].apply(lambda x: row['model'] == x)
    se = df_boot_se.loc[mask, ].copy()
    se['model'] = ''
    for col in se.columns:
        if col != 'model':
            if np.isclose(se[col], 0.0):
                se[col] = ''
            else:
                se[col] = '({:.2f})'.format(se[col].values[0])
    output_table = output_table.append(se)

output_table.rename(columns = {'model': 'Model',
                              'theta': '$\\theta$',
                              'rho_perceived': '$\\hat{\\rho}$',
                              'SSR': 'SSR$_\\text{tot.}$',
                              'G': '$G$',
                              'SSR_agg': 'SSR$_\\text{agg.}$',
                              'SSR_idio': 'SSR$_\\text{idio.}$'},
                    inplace = True)
output_table = output_table[['Model', '$\\theta$', '$\\hat{\\rho}$', 
                             '$G$', 'SSR$_\\text{agg.}$', 'SSR$_\\text{idio.}$', 
                             'SSR$_\\text{tot.}$']]

# Pretty model names
output_table['Model'] = output_table['Model'].replace('noisy_diagnostic', 'Noisy Diagnostic')
output_table['Model'] = output_table['Model'].replace('noisy_misperception', 'Noisy Misperception')
output_table['Model'] = output_table['Model'].replace('misperception', 'Misperception')
output_table['Model'] = output_table['Model'].replace('baseline_noisy', 'Baseline Noisy')
output_table['Model'] = output_table['Model'].replace('FIRE', 'Full-Info Rational')

# Smaller font size for s.e.'s
latex = output_table.to_latex(index = False, escape = False)
latex = latex.replace('(', '{\\footnotesize (')
latex = latex.replace(')', ')}')

with open('{}/calibration.tex'.format(output_folder), "w") as text_file:
    text_file.write(latex)